II. Data Exploration

In [11]:
from plotly import graph_objs as go
# from plotly import version
# print(version)
In [12]:
def create_stack_bar_data(col, df):
    aggregated = df[col].value_counts().sort_index()
    x_values = aggregated.index.tolist()
    y_values = aggregated.values.tolist()
    return x_values, y_values

x1, y1 = create_stack_bar_data('an', accidents)

for i in range(len(x1)):
    x1[i] += 2000

#x1 = x1[:-1]
#y1 = y1[:-1]
color1 = ['red']*4
color2 = ['green']*5
color1.extend(color2)

trace1 = go.Bar(x=x1, y=y1, opacity=0.75, name="year count", marker = dict(color = color1))
layout = dict(height=400, title='Year wise Number of Accidents in France', legend=dict(orientation="h"), 
              xaxis = dict(title = 'Year'), yaxis = dict(title = 'Number of Accidents'))
fig = go.Figure(data=[trace1], layout=layout);
iplot(fig);
In [13]:
accidents.an += 2000
dates = pd.to_datetime(accidents.an*10000+accidents.mois*100+accidents.jour,format='%Y%m%d')
accidents.an -= 2000
traces = []
for key, grp in dates.groupby(dates.dt.year):
    #print(grp)
    aggregated = grp.dt.month.value_counts().sort_index()
    x_values = aggregated.index.tolist()
    y_values = aggregated.values.tolist()
    x1,y1 = x_values, y_values
    x1 = [calendar.month_name[int(x)] for x in (x1)]
    

#x1, y1 = create_stack_bar_data('jour', accidents)
    trace1 = go.Scatter(x=x1, y=y1, opacity=0.75, line = dict(
        width = 1.5), name = str(key), marker = dict(color = np.random.randn(500)*key), mode = 'lines', 
                       text = str(key))
    layout = dict(height=400, title='Time Series of Accidents for each Year', legend=dict(orientation="h"));
    traces.append(trace1)
fig = go.Figure(data= traces, layout=layout)
iplot(fig, filename='stacked-bar')
In [14]:
x1, y1 = create_stack_bar_data('catr', accidents)
x1 = ['Highway', 'National Road', 'Departmental Road', 'Communal Way', 'Off-Public Network', 'Parking Lot', 'Other']
trace1 = go.Bar(x=x1, y=y1, opacity=0.75, name="Category", marker=dict(color='blue'))

x2, y2 = create_stack_bar_data('circ', accidents)
x2 = ['Unknown','One Way', 'Bidirectional', 'Separated Carriageways', 'Variable Assignment Channels']
trace2 = go.Bar(x = x2, y = y2, opacity = 0.75, marker=dict(color='green'), name = "Traffic Flow")

x3, y3 = create_stack_bar_data('prof', accidents)
x3 = ['Unknown', 'Dish','Slope', 'Hill-Top', 'Hill-Bottom']
trace3 = go.Bar(x = x3, y = y3, opacity = 0.75, marker=dict(color='red'), name = "Road Gradient")

fig = tools.make_subplots(rows = 3, cols = 1)

fig.append_trace(trace1, 1, 1)
fig.append_trace(trace2, 2, 1)
fig.append_trace(trace3, 3, 1)
layout = dict(height=900, title='Accidents by Type of Road');
fig.layout.update(layout)
#fig['layout'].update(height=800,title='Accidents by Type of Road')
iplot(fig, filename='stacked-bar')
This is the format of your plot grid:
[ (1,1) x1,y1 ]
[ (2,1) x2,y2 ]
[ (3,1) x3,y3 ]

In [15]:
ageusers = accidents[['an_nais', 'catu']]
ageusers['age'] = 2016 - ageusers.an_nais

keydict = {1:'Driver', 2:'Passenger', 3:'Pedestrian', 4:'Pedestrian in Motion'}
traces = []
for key, grp in ageusers.groupby(ageusers.catu):
    if(key < 4):
    #aggregated = grp.an_nais.value_counts().sort_index()
        x1 = grp.age.values
    #y_values = aggregated.values.tolist()
    #x1,y1 = x_values, y_values
    #x1 = ['Driver','Passenger', 'Pedestrian', 'Pedestrian in Motion']

    
#x1, y1 = create_stack_bar_data('jour', accidents)
        trace1 = go.Histogram(x=x1, opacity=0.5, name = keydict[key], marker = dict(color = '#34d5e4'))
        layout = dict(height=400, title='Distribution of People involved in Accidents by Age', 
                  legend=dict(orientation="h"), barmode = 'overlay');
        traces.append(trace1)
fig = go.Figure(data= traces, layout=layout)
iplot(fig)
C:\Users\devan\anaconda3\lib\site-packages\ipykernel_launcher.py:2: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

In [16]:
safety = accidents[['secu', 'grav']]
safety = safety.dropna()
safety['equipment'] = (safety.secu/10).astype(int)
safety.secu = (safety.secu - safety.equipment*10).astype(int)

x1, y1 = create_stack_bar_data('equipment', safety)
x1 = ['Belt', 'Helmet', "Children's Device", 'Reflective Equipment', "Other"]
trace1 = go.Bar(x=x1, y=y1, opacity=0.75, marker=dict(color='blue'))
layout = dict(height=400, title='Distribution of Safety Equipment', legend=dict(orientation="h") );

fig = go.Figure(data=[trace1], layout=layout)
iplot(fig, filename='stacked-bar')
In [17]:
keydict = {1:'Unscathed', 2:'Killed', 3: 'Hospitalized', 4: 'Light Injury'}
traces = []
for key, grp in safety.groupby(safety.grav):
    if (key != 0):
        count = safety.secu.count()
        #print(count)
        aggregated = (grp.secu.value_counts()).sort_index()
        x_values = aggregated.index.tolist()
        y_values = (aggregated.values/safety.secu.value_counts().sort_index().values*100).tolist()
        x1,y1 = x_values[1:], y_values[1:]
        x1 = ['Equipment Present','Equipment Absent', 'Not Determined']

#x1, y1 = create_stack_bar_data('jour', accidents)
        trace1 = go.Bar(x=x1, y=y1, opacity=0.75, name = keydict[key],  
                        marker = dict(color = 'orchid'))
        layout = dict(height=400, title='Relationship between Safety Equipment and Severity of Accident', 
                      legend=dict(orientation="h"), barmode = 'stack', yaxis = dict(title = 'Percentage'),
                      xaxis = dict(title = 'Safety Equipment'));
        traces.append(trace1)
fig = go.Figure(data= traces, layout=layout)
iplot(fig)
# plt.savefig ( r'C:\Users\devan\OneDrive\Desktop\NumberOfAccidentPerMonth.jpg') 
In [ ]: